## "China's Ideological Spectrum"
## by Jennifer Pan and Yiqing Xu


### Install Packages
## install.packages("foreign")
## install.packages("lavaan")
## install.packages("Amelia")
## install.packages("psych")
## install.packages("doParallel","foreach")

###############################
## Table A5. Descriptive Stats
###############################


rm(list=ls(all=1))
library(foreign)
library(Hmisc)
d<-read.dta("data/abs_short.dta",convert.factors=FALSE)
head(d)
names(d)
covar <- c("female","age","mschool","hschool","college","rural")
describe(d[,covar])


#############################
## Multiple Imputation
#############################

### WARNING - this part takes fairly long processing time
### Results can be directly loaded from intermediate file (line 66)

## library(lavaan)
## library(Amelia)
## questions <- colnames(d)[2:37]

## ## multiple imputation
## set.seed(02139)
## d.mi <- amelia(d, m = 200,  idvars = "id",
##                ords = c(questions, c("rural","female","edu","income")))

## ## load data
## f1 <- c(74:79, 129, 131, 132, 139:148)
## f2 <- c(50:63, 151:153)

## ## a function that generates lavaan model syntax from a list of nubmers
## genModel <- function(model.list) {
##     model <- ""
##     for (j in 1:length(model.list)) {
##         model <- paste(model,
##                        paste('factor',j,' =~ ',
##                              paste(paste('q', model.list[[j]], sep=""),
##                                    collapse = " + "), sep = ""),
##                        "\n", sep = "") 
##     }
##     return(model)
## }
## save(d, d.mi, questions, genModel, f1, f2, file = "data/abs_mi.RData")


########################################
## Figure A5 (upper). PCA for ABS
########################################

rm(list=ls(all=1))
load("data/abs_mi.RData")
s <- d.mi$imputations[[1]]
pca.out<-prcomp(s[,questions], center=TRUE,scale=TRUE)
names(pca.out)
summary(pca.out)

library(psych)
out <- scree(s[,questions])
fv <- out$fv
pcv <- out$pcv

pdf("graphs/abs_scree.pdf")
par(mar = c(5, 4, 1, 1))
plot(1, type = "n", ylim = c(-0.3, 5.2), xlim = c(1,36),
     xlab = "", ylab = "", axes = FALSE)
mtext("i'th Principal Component or Factor", 1, 2.5, cex = 1.5)
mtext("Eigen Values of Components and Factors", 2, 2.5, cex = 1.5)
abline(h = 1, col = "#AAAAAA50", lwd = 4, lty = 1)
abline(h = seq(-1,20,0.25), col = "#AAAAAA50")
abline(v = seq(0,50,by=2), col = "#AAAAAA50")
lines(fv, col = "gray30", lwd = 2, lty = 3)
points(fv, pch = 16, col = "white", cex = 1.8)
lines(pcv, col = 1, lwd = 2)
points(pcv, pch = 16, col ="white", cex = 1.8) 
points(fv, col = "gray30", pch = 1, cex = 1.1)
points(pcv, pch = 16, col =1, cex = 1.2)
axis(1, at = seq(0,50,10), cex.axis = 1.3)
axis(2, cex.axis = 1.3)
legend("topright", c("Principal Component", "Factor"), pch = c(16, 1),
       col = c(1, "gray30"), bty = "n", cex = 1.5)
box()
graphics.off()

########################################
## Figure A4 (lower). CFA latent factors
########################################

library(lavaan)
mod <- list(f1, f2)
fit <- cfa(genModel(mod), data=s[,questions], std.lv = TRUE)
inspect(fit,"cov.lv")
lt <- lavPredict(fit)

pdf("graphs/abs_corr.pdf")
par(mar = c(5, 4, 1, 1))
plot(1, type = "n", xlab = "", ylab = "",
     xlim = c(-3.5,3.5), ylim = c(-3.5,3.5))
abline(h = seq(-4,4,0.25), col = "#AAAAAA50")
abline(v = seq(-4,4,0.25), col = "#AAAAAA50")
points(lt, cex = 0.6, pch = 16, col = "gray30")
text(0,3.2,paste("Corr =", round(cor(lt[,1],lt[,2]),3)), cex = 2, font = 3)
mtext("Latent Factor 1 (Political)", 1, 2.5, cex = 1.5)
mtext("Latent Factor 2 (Social and Economic)", 2, 2.5, cex = 1.5)
graphics.off()


#############################
## CFA analysis for ABS data
#############################

### WARNING - this part takes fairly long processing time
### Results can be directly loaded from intermediate file (line 196)

## ## function
## mean.boot <- function(X, insample = NULL, nboot = 1000, seed = 123) {
##     library(plyr)
##     set.seed(seed)
##     ## in sample
##     if (is.null(insample)==TRUE) {
##         insample <- 1:dim(d)[1]
##     }
##     n <- length(insample)
##     dd<-d[insample, ]
##     lt <- mi.out[insample,,]
##     ## bootstrap
##     ngroup<-length(table(dd[,X]))
##     result<-array(NA,dim = c(ngroup,2,nboot))
##     for (i in 1:nboot) {
##         smp <- sample(1:n,n,replace=TRUE) # which observation to sample
##         smp2 <- sample(1:dim(mi.out)[3],n,replace=TRUE) # which mi sample to use 
##         s<- dd[smp,]
##         s$lt1 <- s$lt2 <- NA
##         for (j in 1:n){
##             s[j,c("lt1","lt2")] <- lt[smp[j],,smp2[j]]
##         }
##         result[,1,i]<-c(by(s,s[,X], function(x) mean(x$lt1)))
##         result[,2,i]<-c(by(s,s[,X], function(x) mean(x$lt2)))
##         if (i%%50==0) cat(i) else cat(".")
##     }
##     out <- array(NA, dim = c(ngroup, 2, 3))
##     out[,1,] <- cbind(apply(result[,1,],1,mean), t(apply(result[,1,],1,quantile,c(0.025,0.975))))
##     out[,2,] <- cbind(apply(result[,2,],1,mean), t(apply(result[,2,],1,quantile,c(0.025,0.975))))
##     return(out)
## }

## ## estimation
## load("data/abs_mi.RData")
## nobs <- dim(d)[1]
## mi.out <- array(NA, dim = c(nobs, 2, 200))
## for (i in 1:200) {
##     s <- d.mi$imputations[[i]]
##     mod <- list(f1, f2)
##     fit <- cfa(genModel(mod), data=s[,questions], std.lv = TRUE)
##     mi.out[,,i] <- lavPredict(fit)
##     if (i%%10 == 0) {cat(i)} else {cat(".")}
## }
## save(mi.out, file = "output/result_abs_cfa.RData")

## ## bootstrap
## load("output/result_abs_cfa.RData")
## nboot <- 1000
## out.edu <- mean.boot(X = "edu", nboot = nboot)
## out.inc <- mean.boot(X = "income", nboot = nboot)
## out.age <- mean.boot(insample = which(d$age<=60), X = "age", nboot)
## out.edu.rural <- mean.boot(insample = which(d$rural == 1), X = "edu", nboot = nboot)
## out.edu.urban <- mean.boot(insample = which(d$rural == 0), X = "edu", nboot = nboot)
## out.inc.rural <- mean.boot(insample = which(d$rural == 1), X = "income", nboot = nboot)
## out.inc.urban <- mean.boot(insample = which(d$rural == 0), X = "income", nboot = nboot)
## out.age.rural <- mean.boot(insample = which(d$rural==1 & d$age<=60), X = "age", nboot)
## out.age.urban <- mean.boot(insample = which(d$rural==0 & d$age<=60), X = "age", nboot)

## save(out.edu, out.inc, out.age,
##      out.age.rural, out.age.urban,
##      file = "output/result_abs_boot.RData")

###############################
## plotting
###############################

rm(list=ls(all=1))
load("output/result_abs_boot.RData")
mycol = c(1, "gray50")
mypch = c(16, 17)
offset <- c(-0.1,0.1)

########################
## Figure 10 (lower)
########################

## education
pdf("graphs/abs_educ.pdf")
output <- out.edu
par(mar=c(4.5,4.5,2,2))
plot(1, type="n", ylab="Liberal-NonTraditional Values",
     xlab="", ylim=c(-1, 1), xlim=c(0.5,5.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=c(1,3,5), tick = FALSE, ,cex.axis=1.2,
     labels=c("< Primary School","Middle School","College or Abvoe"))
axis(1,at=c(2,4),labels=c("Primary School","High School"),
     cex.axis=1.2, line = 1.5, tick = FALSE)
axis(1,lab = FALSE, at = 1:5)
axis(2,cex.axis=1.5)
for(i in seq(-2,2,by=0.1)) abline(h=i,col="#AAAAAA80",lwd=1)
for(i in seq(-0.5,10.5,by=0.25)) abline(v=i,col="#AAAAAA80",lwd=1)
n<-dim(output)[1]
for (k in 1:2) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = mycol[k])}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism","Non-Traditional Values"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
box()
graphics.off()


## income
pdf("graphs/abs_income.pdf")
output <- out.inc
par(mar=c(4.5,4.5,2,2))
plot(1, type="n", ylab="Liberal-NonTraditional Values",
     xlab="Self-Reported Income Level",
     ylim=c(-1,1), xlim=c(0.5,5.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=1:5,labels=c("Lowest","Low","Middle","High","Highest"),cex.axis=1.5)
axis(2,cex.axis=1.5)
for(i in seq(-2,2,by=0.1)) abline(h=i,col="#AAAAAA80",lwd=1)
for(i in seq(-0.5,10.5,by=0.25)) abline(v=i,col="#AAAAAA80",lwd=1)
n<-dim(output)[1]
for (k in 1:2) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = mycol[k])}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = mycol[k])
}
legend("topleft", legend = c("Political Liberalism","Non-Traditional Values"),
       col=mycol, pch=mypch, lty=1, bty = "n", lwd = 3, cex = 1.3)
graphics.off()

########################
## Figure 11 (lower)
########################

pdf("graphs/abs_age.pdf", width = 16, height=8)
par(mar=c(4.5,4.5,2,2), mfcol = c(1,2))
## political
output <- out.age
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
box();
title("Political Liberalism", line = -2, cex.main = 1.6)
## cultural
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 2) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
title("Non-Traditional Values", line = -2, cex.main = 1.6)
box()
graphics.off()

########################
## Figure A5
########################

## age
pdf("graphs/abs_age_urban.pdf", width = 16, height=8)
par(mar=c(4.5,4.5,2,2), mfcol = c(1,2))
## political
output <- out.age.urban
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
box();
title("Political Liberalism", line = -2, cex.main = 1.6)
## cultural
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 2) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
title("Non-Traditional Values", line = -2, cex.main = 1.6)
box()
graphics.off()


pdf("graphs/abs_age_rural.pdf", width = 16, height=8)
par(mar=c(4.5,4.5,2,2), mfcol = c(1,2))
## political
output <- out.age.rural
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 1) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
box();
title("Political Liberalism", line = -2, cex.main = 1.6)
## cultural
plot(1, type="n", ylab="",
     xlab="Age", ylim=c(-1,1.5), xlim=c(0.5,43.5),
     cex.lab=1.5, cex.axis=1.5, xaxt="n")
abline(h=0)
axis(1,at=seq(1,43,by=3), lab=seq(18,60,by=3),cex.axis=1.5)
axis(2,cex.axis=1.5)
abline(h=seq(-2,2,by=0.25),col="#AAAAAA50",lwd=1)
abline(v=seq(-0.5,60.5,by=2),col="#AAAAAA50",lwd=1)
n<-dim(output)[1]
for (k in 2) { # four latent variables
    for (i in 1:n) {lines(c(i,i) + offset[k],c(output[i,k,2],output[i,k,3]),lwd=4, col = 1)}
    points(c(1:n) + offset[k],output[,k,1],pch=mypch[k],cex=2, col = 1)
}
title("Non-Traditional Values", line = -2, cex.main = 1.6)
box()
graphics.off()


